Load the packages:
if (!require("pacman")) install.packages("pacman")
pacman::p_load("tidyverse", "tsibble", "bfast", "data.table", "mgcv","forecast",
"anytime", "fabletools", "signal", "fable", "tibble",
"sf", "sits", "gdalcubes", "terra")Import data - raw MTCI values from Sentinel-2 acquired from GEE:
df = read.csv("d:/OGH2023_data/species_mtci.csv")
str(df)## 'data.frame': 1446996 obs. of 3 variables:
## $ system.index: chr "20170329T095021_20170329T095024_T33UXR_00000000000000000042" "20170329T095021_20170329T095024_T33UXR_00000000000000000043" "20170329T095021_20170329T095024_T33UXR_00000000000000000044" "20170329T095021_20170329T095024_T33UXR_00000000000000000045" ...
## $ MTCI : num NA NA NA NA NA NA NA NA NA NA ...
## $ species_cd : chr "GB" "GB" "GB" "GB" ...
summary(df)## system.index MTCI species_cd
## Length:1446996 Min. :-8.2 Length:1446996
## Class :character 1st Qu.: 1.3 Class :character
## Mode :character Median : 3.1 Mode :character
## Mean : 3.0
## 3rd Qu.: 4.4
## Max. :17.8
## NA's :1430563
As you can see, there is a lot of NA values, also, if we plot index values there are a lot of outliers.
plot(df$MTCI)Modify column with date and change columns names:
df$system.index = as.Date(df$system.index, format = "%Y%m%d")
names(df) = c("date", "index", "type")Filtering values, removing NA, mean of duplicates:
df_clean = df %>%
drop_na() %>%
group_by(date, type) %>%
summarise(index = mean(index)) %>%
ungroup() %>%
dplyr::filter(index < 7 & index > 0 & date > "2018-03-05")
ggplot(df_clean, aes(date, index, color = type))+
geom_point()+
theme_light()df_gb = df_clean %>%
dplyr::filter(type == "GB")
ggplot(df_gb, aes(date, index))+
geom_line(color = "darkgreen", linewidth = 1)+
theme_light()Firstly, identify and replace outliers using simple linear interpolation:
df_gb$index_out = df_gb$index %>%
tsclean()
ggplot(df_gb)+
geom_point(aes(date, index), color = "red")+
geom_point(aes(date, index_out), color = "darkgreen")+
theme_light()df_gb$avg3 = rollmean(df_gb$index_out ,3, fill = NA)
df_gb$avg10 = rollmean(df_gb$index_out ,10, fill = NA)
ggplot(df_gb)+
geom_line(aes(date, index_out))+
geom_line(aes(date, avg3), color = "blue", linewidth = 1.0)+
geom_line(aes(date, avg10), color = "red", linewidth = 1.0)+
theme_light()We need to create regular (1-day) time series with NA for missing values.
df_bfast = bfastts(df_gb$index_out, df_gb$date, type = ("irregular"))
df_tibble = tibble(date = seq(as.Date(df_gb$date[1]), by = "day",
length.out = length(df_bfast)), value = df_bfast) %>%
as_tsibble(index = date) Interpolation of unknown values using ARIMA fitting (The ARIMA() model requires equal spacing between observations)
df_inter = df_tibble %>%
model(arima = ARIMA(value ~ -1 + pdq(0,1,0) + PDQ(0,0,0))) %>%
fabletools::interpolate(df_tibble)Apply Savitzky-Golay filter. n is a filter length (odd number) - test different values.
df_inter$sg = df_inter$value %>%
sgolayfilt(n = 71) Analyze the results on plot - line represent time series smoothed by S-G, red dots - original values and blue dots - interpolated values
df_inter$original = df_tibble$value
ggplot(df_inter)+
geom_point(aes(date, value), color = "blue", alpha = 0.3)+
geom_point(aes(date, original), color = "red", alpha = 0.4)+
geom_line(aes(date, sg), linewidth = 1)+
theme_light()The first step is the same as in Savitzky-Golay - we will create regular time series with NA values:
df_bfast = bfastts(df_gb$index_out, df_gb$date, type = ("irregular"))
df_tibble = tibble(date = seq(as.Date(df_gb$date[1]), by = "day",
length.out = length(df_bfast)), value = df_bfast) %>%
as_tsibble(index = date) %>%
ts() %>% #but here also we need to change date format from y-m-d to
as.data.frame()But now we can go straight to modelling without dealing with missing values!
GAM modelling with dates as predictor (Generalized Additive Mixed Models):
model = gamm(df_tibble$value ~ s(date, k = 60),
data = df_tibble, method = "REML")Predicting values using GAM model and plotting the results:
df_tibble$predicted = predict.gam(model$gam, df_tibble)
df_tibble$date = anydate(df_tibble$date)
ggplot(df_tibble)+
geom_point(aes(date, value), color = "red", alpha = 0.4)+
geom_line(aes(date, predicted), linewidth = 1)+
theme_light()Compare S-G (blue line) and GAM (black line):
df_tibble$sg = df_inter$sg
ggplot(df_tibble)+
geom_point(aes(date, value), color = "red", alpha = 0.4)+
geom_line(aes(date, predicted), alpha = 0.8, linewidth = 1)+
geom_line(aes(date, sg), color = "blue", alpha = 0.8, linewidth = 1)+
theme_light()In this part we will use Savitzky-Golay smoothing and then try to detect breaks and trends in satellite time series for the part of Poznań
Load the data (already pre-processed:)
df = read.csv("d:/OGH2023_data/poznan_ndvi_clean.csv")
str(df)## 'data.frame': 398387 obs. of 4 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ system.index: chr "2018-04-06" "2018-04-06" "2018-04-06" "2018-04-06" ...
## $ pointid : int 1 2 3 4 5 6 7 8 9 10 ...
## $ index : num 0.395 0.365 0.427 0.405 0.454 ...
summary(df)## X system.index pointid index
## Min. : 1 Length:398387 Min. : 1.0 Min. :-0.2184
## 1st Qu.: 99598 Class :character 1st Qu.: 450.0 1st Qu.: 0.2277
## Median :199194 Mode :character Median : 900.0 Median : 0.4004
## Mean :199194 Mean : 903.1 Mean : 0.4344
## 3rd Qu.:298791 3rd Qu.:1357.0 3rd Qu.: 0.6487
## Max. :398387 Max. :1813.0 Max. : 0.9997
df = df[,-1]
df$system.index = as.Date(df$system.index, format = "%Y-%m-%d")
names(df) = c("date", "pointid", "index")
ggplot(sample_n(df, 5000), aes(date, index))+
geom_point(alpha = 0.5)+
theme_light()Check examples examples with pointid 551 - vegetation development; 1311 and 472 - new built-up; 1623 conifer trees; 1149 built up to green space
df_sel = df %>%
dplyr::filter(pointid %in% c(472, 551,1149,1311, 1623))
ggplot(df_sel, aes(date, index,
group = pointid, color = as.factor(pointid)))+
geom_line(linewidth = 1, alpha = 0.6)+
scale_color_manual(values = c("#FF6666", "green", "purple", "#FF9900", "darkgreen"))+
theme_light()Let’s now check the just the place with vegetation development and than smooth the time series:
df_sel = df %>%
dplyr::filter(pointid %in% c(551))
ggplot(df_sel, aes(date, index,
group = pointid, color = as.factor(pointid)))+
geom_line(linewidth = 1)+
theme_light()Based on previous part, we will use Savitzky-Golay smoothing - to make it easier you can find the function with all necessary steps (e.g. producing regular time series) and applying S-G function. The input for this is id (pointid in our loaded dataframe) and input_df, with 3 variables: date, id, and index value.
sg = function(id, input_df) {
df_sel = input_df %>%
dplyr::filter(pointid == id)
df_bfast = bfastts(df_sel$index, df_sel$date, type = ("irregular"))
df_tibble = tibble(date = seq(as.Date(df_sel$date[1]), by = "day",
length.out = length(df_bfast)), value = df_bfast) %>%
as_tsibble(index = date)
df_inter = df_tibble %>%
model(arima = ARIMA(value ~ -1 + pdq(0,1,0) + PDQ(0,0,0))) %>%
fabletools::interpolate(df_tibble)
df_inter$sg = df_inter$value %>%
sgolayfilt(n = 91) # n is a filter length
df_inter$id = id
return(df_inter[,-2])
}
sg(1068, df) %>%
ggplot()+
geom_line(aes(date, sg),
alpha = 0.8, linewidth = 1)+
theme_light()First example - new built-up area:
df_sg = sg(1068, df)
df_sg_ts = df_sg[,c(1,2)] %>%
ts(frequency = 365)
df_sg_ts[,2] %>% decompose() %>%
plot()Second example - vegetation development:
df_sg = sg(551, df)
df_sg_ts = df_sg[,c(1,2)] %>%
ts(frequency = 365)
df_sg_ts[,2] %>% decompose() %>%
plot()Different method - stl: Seasonal Decomposition of Time Series by Loess:
df_sg_ts[,2] %>%
stl(t.window=13, s.window="periodic", robust=TRUE) %>%
plot()Detect breaks using bfast function - firstly, apply S-G smoothing (the data should be a regular ts() object without NAs). This is the example for pixel no 1311.
change1 = sg(1311, df) %>%
select(date, sg) %>%
ts(frequency = 365)Use bfast function - which is an iterative break detection in seasonal and trend component of a time series; plot the results and the detected break date:
fit = bfast(change1[,2], h = 0.01, season = "harmonic", max.iter = 1, breaks = 1)
plot(fit)anydate(change1[1] + fit$Time)## [1] "2021-11-22"
And another example
change1 = sg(472, df) %>%
select(date, sg) %>%
ts(frequency = 365)
fit = bfast(change1[,2], h = 0.01, season = "harmonic", max.iter = 1, breaks = 1)
plot(fit)anydate(change1[1] + fit$Time)## [1] "2020-03-01"
Then these results can be, for example, joined with spatial data - and we can produce map of new built-up areas, like in the example below.
Still, method is not perfect, you can try testing other parameters! :)
In this part, we will test some basics of SITS * package, which is for satellite image time series analysis. They are stored as earth observation data cubes. The book about SITS package can be found here
*Rolf Simoes, Gilberto Camara, Gilberto Queiroz, Felipe Souza, Pedro R. Andrade, Lorena Santos, Alexandre Carvalho, and Karine Ferreira. “Satellite Image Time Series Analysis for Big Earth Observation Data”. Remote Sensing, 13: 2428, 2021. doi:10.3390/rs13132428 Collections that can be used in SITS package are available in cloud services, such as Amazon Web Service or Microsoft’s Planetary Computer.
Firstly, we will try to find collection of Landsat imagery from MPC- you can specify your own region of interest (roi).
L8_cube = sits_cube(
source = "MPC",
collection = "LANDSAT-C2-L2",
bands = c("RED", "SWIR16", "NIR08", "CLOUD"),
roi = c("lat_min" = 50.026, "lon_min" = 19.902, "lat_max" = 50.027, "lon_max" = 19.903),
start_date = "2021-06-01",
end_date = "2022-08-01",
prgress = TRUE)
View(L8_cube)
sits_timeline(L8_cube)Select one tile with low cloud cover and plot it:
plot(L8_cube,
red = "SWIR16", green = "NIR08", blue = "RED",
date = "2021-06-18")As the time series are irregular, they need to be converted to regular data cubes before further processing in sits. It may take some time…
reg_cube = sits_regularize(
cube = L8_cube,
output_dir = tempdir(),
res = 120,
period = "P1M",
multicores = 4)And calculate NDVI:
reg_cube_ndvi = sits_apply(reg_cube,
NDVI = (NIR08 - RED) / (NIR08 + RED),
output_dir = tempdir(),
multicores = 4,
memsize = 12)
plot(reg_cube_ndvi, band = "NDVI", palette = "RdYlGn")